library(dplyr)
library(ggplot2)
library(ggforce)
library(gganimate)
library(tidyr)
library(caret)
library(DT)
library(summarytools)
library(gifski)
library(png)
Aby zapewnić powtarzalność wyników ustawiamy stan losowego generatora liczb.
set.seed(123)
Dane rozdzielone są znakiem “;”, dlatego użyję funkcji read.csv2 do wczytania danych. Kolumna title jest połączeniem kolumn pdb_code, res_name, res_id oraz chain_id, zatem możemy ją usunąć podczas wczytywania, aby uniknąć powtarzania informacji.
all_data <- read.csv2("all_summary.csv", nrows = 1000, header = TRUE, dec=".", stringsAsFactors = FALSE) %>% select(-(blob_coverage:title))
Wiersze, które w kolumnie “res_name” zawierają niepożądaną przez nas wartość zostają usunięte.
cleaned_data <- all_data %>% filter(!res_name %in% c('UNK', 'UNX', 'UNL', 'DUM', 'N', 'BLOB', 'ALA', 'ARG', 'ASN', 'ASP', 'CYS', 'GLN', 'GLU', 'GLY', 'HIS', 'ILE', 'LEU', 'LYS', 'MET', 'MSE', 'PHE', 'PRO', 'SEC', 'SER', 'THR', 'TRP', 'TYR', 'VAL', 'DA', 'DG', 'DT', 'DC', 'DU', 'A', 'G', 'T', 'C', 'U', 'HOH', 'H20', 'WAT'))
W zbiorze danych występuje kolumna, która równa jest NA we wszystkich wierszach.
rows_without_na_in_weight_co = filter(cleaned_data, !is.na(weight_col))
dim(rows_without_na_in_weight_co)[1]
## [1] 0
Jak widać brak wierszy, których kolumna weight_col nie ma wartości pustej, dlatego możemy ją wykluczyć z dalszej analizy.
cleaned_data_without_empty_col <- select(cleaned_data, -weight_col)
Zebrane dane zawierają… 408 kolumn oraz 1000 wierszy. Dane są typu character, integer, numeric. Większość kolumn jest numeryczna. Ich podstawowe statystyki prezentują się tak:
Natomiast pozostałe kolumny są następujące
knitr::kable(summary(cleaned_data_without_empty_col %>% select_if(is.character)))
| pdb_code | res_name | chain_id | skeleton_data | fo_col | fc_col | |
|---|---|---|---|---|---|---|
| Length:1000 | Length:1000 | Length:1000 | Length:1000 | Length:1000 | Length:1000 | |
| Class :character | Class :character | Class :character | Class :character | Class :character | Class :character | |
| Mode :character | Mode :character | Mode :character | Mode :character | Mode :character | Mode :character |
Naszą analizę ograniczymy do 50 najpopularniejszych wartości kolumny res_name
top_50_res_name <- cleaned_data %>%
select(res_name) %>%
group_by(res_name) %>%
count() %>%
arrange(desc(n)) %>%
head(50)
data_with_most_common_res_names <- cleaned_data %>% filter(res_name %in% top_50_res_name$res_name)
Rozkład jej wartości prezentuje się następująco
W celu sprawdzenia korelacji użyjemy korelacji Rho Spearmana, ponieważ rozkład wartości przynajmniej jednej kolumny nie jest rozkładem normalnym
correlation <- as.data.frame(as.table(cor(data_with_most_common_res_names %>% select_if(is.numeric), use="complete.obs", method="spearman")))
## Warning in cor(data_with_most_common_res_names %>% select_if(is.numeric), :
## odchylenie standardowe wynosi zero
Usuniemy teraz korelacje kolumn samych ze sobą
correlation <- correlation %>%
rename(first_column = Var1, second_column = Var2, freq = Freq) %>%
filter(first_column != second_column)
Grupujemy po pierwszej kolumnie oraz dla każdej wartości obliczamy maksymalną wartośi. Następnie wyznaczmy 10 kolumn z największą korelacją oraz filtrujemy dane do wizualizacji
top_correlated <- correlation %>%
group_by(first_column) %>%
summarise(max=max(freq, na.rm = TRUE)) %>%
arrange(desc(max)) %>%
head(10)
correlation <- correlation %>% filter((first_column %in% top_correlated$first_column & second_column %in% top_correlated$first_column))
part_01_all <- data_with_most_common_res_names %>% select(contains('part_01'))
part_01_all <- gather(part_01_all, 'key', 'value')
n_pages <- ceiling(
length(levels(factor(part_01_all$key))) / 9
)
continuous_means <- part_01_all %>% group_by(key) %>% summarise(mean_value=mean(value))
for (i in seq_len(n_pages)) {
print(ggplot(part_01_all, aes(value, fill=1)) +
geom_density(show.legend=FALSE) +
geom_vline(data=continuous_means, aes(xintercept=mean_value), linetype="dashed") +
geom_text(data = continuous_means, aes(label = mean_value, y=1, x=mean_value)) +
facet_wrap_paginate(~ key, ncol = 3, nrow = 3, scales='free', page = i) +
theme_minimal())
}
## Warning: Removed 1152 rows containing non-finite values (stat_density).
## Warning: Removed 96 rows containing missing values (geom_vline).
## Warning: Removed 96 rows containing missing values (geom_text).
## Warning: Removed 1152 rows containing non-finite values (stat_density).
## Warning: Removed 96 rows containing missing values (geom_vline).
## Warning: Removed 96 rows containing missing values (geom_text).
## Warning: Removed 1152 rows containing non-finite values (stat_density).
## Warning: Removed 96 rows containing missing values (geom_vline).
## Warning: Removed 96 rows containing missing values (geom_text).
## Warning: Removed 1152 rows containing non-finite values (stat_density).
## Warning: Removed 96 rows containing missing values (geom_vline).
## Warning: Removed 96 rows containing missing values (geom_text).
## Warning: Removed 1152 rows containing non-finite values (stat_density).
## Warning: Removed 96 rows containing missing values (geom_vline).
## Warning: Removed 96 rows containing missing values (geom_text).
## Warning: Removed 1152 rows containing non-finite values (stat_density).
## Warning: Removed 96 rows containing missing values (geom_vline).
## Warning: Removed 96 rows containing missing values (geom_text).
## Warning: Removed 1152 rows containing non-finite values (stat_density).
## Warning: Removed 96 rows containing missing values (geom_vline).
## Warning: Removed 96 rows containing missing values (geom_text).
## Warning: Removed 1152 rows containing non-finite values (stat_density).
## Warning: Removed 96 rows containing missing values (geom_vline).
## Warning: Removed 96 rows containing missing values (geom_text).
## Warning: Removed 1152 rows containing non-finite values (stat_density).
## Warning: Removed 96 rows containing missing values (geom_vline).
## Warning: Removed 96 rows containing missing values (geom_text).
## Warning: Removed 1152 rows containing non-finite values (stat_density).
## Warning: Removed 96 rows containing missing values (geom_vline).
## Warning: Removed 96 rows containing missing values (geom_text).
## Warning: Removed 1152 rows containing non-finite values (stat_density).
## Warning: Removed 96 rows containing missing values (geom_vline).
## Warning: Removed 96 rows containing missing values (geom_text).
## Warning: Removed 1152 rows containing non-finite values (stat_density).
## Warning: Removed 96 rows containing missing values (geom_vline).
## Warning: Removed 96 rows containing missing values (geom_text).
animation_data <- data_with_most_common_res_names %>%
select(res_name,
part_00_shape_segments_count, part_00_density_segments_count,
part_01_shape_segments_count, part_01_density_segments_count,
part_02_shape_segments_count, part_02_density_segments_count)
animation_data <- gather(animation_data, 'key', 'value', -res_name)
animation_data <- animation_data %>% separate(key, into = c("name", "part_number", "abc")) %>% mutate(part_number=as.integer(part_number) + 1)
## Warning: Expected 3 pieces. Additional pieces discarded in 4920 rows [1, 2,
## 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ...].
p <- ggplot(animation_data, aes(abc, value, colour=res_name)) +
geom_point(position="jitter") +
transition_time(part_number) +
theme_bw() +
labs(title="{round(frame_time)}")
animate(p, nframes = 150, fps = 30, width = 600, height = 600,
renderer = gifski_renderer(loop = F))
correlation <- cor(data_with_most_common_res_names %>% select_if(is.numeric), use="complete.obs", method="spearman")
## Warning in cor(data_with_most_common_res_names %>% select_if(is.numeric), :
## odchylenie standardowe wynosi zero
correlation <- as.data.frame((correlation))
correlation
electron_count_predict_data <- select(data_with_most_common_res_names, (local_res_atom_non_h_electron_sum:local_res_atom_S_count))
electron_count_predict_data
indexes <- createDataPartition(electron_count_predict_data$local_res_atom_non_h_electron_sum,
p=0.7, list=F)
indexes
## Resample1
## [1,] 1
## [2,] 2
## [3,] 3
## [4,] 6
## [5,] 7
## [6,] 8
## [7,] 10
## [8,] 11
## [9,] 14
## [10,] 17
## [11,] 18
## [12,] 19
## [13,] 20
## [14,] 21
## [15,] 23
## [16,] 26
## [17,] 28
## [18,] 29
## [19,] 31
## [20,] 32
## [21,] 33
## [22,] 34
## [23,] 35
## [24,] 36
## [25,] 37
## [26,] 39
## [27,] 40
## [28,] 41
## [29,] 42
## [30,] 43
## [31,] 45
## [32,] 47
## [33,] 48
## [34,] 49
## [35,] 50
## [36,] 51
## [37,] 52
## [38,] 53
## [39,] 54
## [40,] 56
## [41,] 58
## [42,] 59
## [43,] 64
## [44,] 66
## [45,] 68
## [46,] 72
## [47,] 74
## [48,] 75
## [49,] 76
## [50,] 77
## [51,] 79
## [52,] 80
## [53,] 81
## [54,] 85
## [55,] 86
## [56,] 88
## [57,] 90
## [58,] 91
## [59,] 94
## [60,] 95
## [61,] 97
## [62,] 99
## [63,] 100
## [64,] 101
## [65,] 104
## [66,] 105
## [67,] 106
## [68,] 107
## [69,] 109
## [70,] 110
## [71,] 112
## [72,] 113
## [73,] 115
## [74,] 116
## [75,] 117
## [76,] 118
## [77,] 120
## [78,] 121
## [79,] 122
## [80,] 124
## [81,] 125
## [82,] 127
## [83,] 128
## [84,] 129
## [85,] 130
## [86,] 131
## [87,] 132
## [88,] 134
## [89,] 135
## [90,] 136
## [91,] 137
## [92,] 138
## [93,] 140
## [94,] 141
## [95,] 143
## [96,] 145
## [97,] 146
## [98,] 147
## [99,] 148
## [100,] 149
## [101,] 150
## [102,] 151
## [103,] 152
## [104,] 154
## [105,] 155
## [106,] 156
## [107,] 157
## [108,] 161
## [109,] 163
## [110,] 164
## [111,] 165
## [112,] 167
## [113,] 168
## [114,] 169
## [115,] 171
## [116,] 172
## [117,] 173
## [118,] 174
## [119,] 175
## [120,] 177
## [121,] 178
## [122,] 179
## [123,] 180
## [124,] 181
## [125,] 183
## [126,] 184
## [127,] 185
## [128,] 187
## [129,] 188
## [130,] 190
## [131,] 191
## [132,] 192
## [133,] 193
## [134,] 194
## [135,] 195
## [136,] 197
## [137,] 198
## [138,] 200
## [139,] 201
## [140,] 203
## [141,] 204
## [142,] 205
## [143,] 206
## [144,] 207
## [145,] 209
## [146,] 210
## [147,] 211
## [148,] 212
## [149,] 213
## [150,] 214
## [151,] 215
## [152,] 216
## [153,] 217
## [154,] 220
## [155,] 221
## [156,] 222
## [157,] 225
## [158,] 227
## [159,] 228
## [160,] 229
## [161,] 230
## [162,] 231
## [163,] 232
## [164,] 233
## [165,] 234
## [166,] 235
## [167,] 236
## [168,] 239
## [169,] 240
## [170,] 243
## [171,] 244
## [172,] 246
## [173,] 247
## [174,] 249
## [175,] 252
## [176,] 253
## [177,] 254
## [178,] 255
## [179,] 256
## [180,] 257
## [181,] 261
## [182,] 263
## [183,] 264
## [184,] 265
## [185,] 267
## [186,] 268
## [187,] 269
## [188,] 270
## [189,] 272
## [190,] 273
## [191,] 274
## [192,] 277
## [193,] 278
## [194,] 281
## [195,] 283
## [196,] 284
## [197,] 285
## [198,] 287
## [199,] 288
## [200,] 289
## [201,] 292
## [202,] 293
## [203,] 297
## [204,] 298
## [205,] 301
## [206,] 302
## [207,] 303
## [208,] 304
## [209,] 305
## [210,] 306
## [211,] 307
## [212,] 308
## [213,] 309
## [214,] 310
## [215,] 311
## [216,] 314
## [217,] 316
## [218,] 318
## [219,] 321
## [220,] 322
## [221,] 323
## [222,] 324
## [223,] 325
## [224,] 326
## [225,] 328
## [226,] 329
## [227,] 331
## [228,] 332
## [229,] 333
## [230,] 334
## [231,] 335
## [232,] 336
## [233,] 338
## [234,] 339
## [235,] 340
## [236,] 341
## [237,] 342
## [238,] 343
## [239,] 345
## [240,] 347
## [241,] 348
## [242,] 349
## [243,] 350
## [244,] 351
## [245,] 352
## [246,] 353
## [247,] 354
## [248,] 357
## [249,] 359
## [250,] 360
## [251,] 362
## [252,] 363
## [253,] 364
## [254,] 365
## [255,] 366
## [256,] 367
## [257,] 369
## [258,] 370
## [259,] 371
## [260,] 372
## [261,] 373
## [262,] 375
## [263,] 377
## [264,] 378
## [265,] 379
## [266,] 380
## [267,] 381
## [268,] 382
## [269,] 383
## [270,] 384
## [271,] 385
## [272,] 386
## [273,] 387
## [274,] 388
## [275,] 389
## [276,] 391
## [277,] 392
## [278,] 396
## [279,] 397
## [280,] 398
## [281,] 399
## [282,] 400
## [283,] 401
## [284,] 402
## [285,] 403
## [286,] 404
## [287,] 405
## [288,] 406
## [289,] 407
## [290,] 409
## [291,] 411
## [292,] 412
## [293,] 413
## [294,] 414
## [295,] 415
## [296,] 416
## [297,] 418
## [298,] 419
## [299,] 422
## [300,] 423
## [301,] 424
## [302,] 425
## [303,] 426
## [304,] 427
## [305,] 428
## [306,] 429
## [307,] 430
## [308,] 433
## [309,] 435
## [310,] 436
## [311,] 437
## [312,] 439
## [313,] 440
## [314,] 441
## [315,] 443
## [316,] 444
## [317,] 445
## [318,] 446
## [319,] 448
## [320,] 450
## [321,] 451
## [322,] 452
## [323,] 453
## [324,] 455
## [325,] 457
## [326,] 460
## [327,] 463
## [328,] 464
## [329,] 465
## [330,] 468
## [331,] 469
## [332,] 470
## [333,] 472
## [334,] 473
## [335,] 474
## [336,] 475
## [337,] 477
## [338,] 479
## [339,] 480
## [340,] 482
## [341,] 483
## [342,] 484
## [343,] 485
## [344,] 486
## [345,] 487
## [346,] 488
## [347,] 489
## [348,] 490
## [349,] 491
## [350,] 492
## [351,] 493
## [352,] 494
## [353,] 497
## [354,] 498
## [355,] 499
## [356,] 500
## [357,] 501
## [358,] 505
## [359,] 508
## [360,] 509
## [361,] 510
## [362,] 511
## [363,] 512
## [364,] 514
## [365,] 515
## [366,] 516
## [367,] 517
## [368,] 518
## [369,] 519
## [370,] 520
## [371,] 522
## [372,] 523
## [373,] 524
## [374,] 525
## [375,] 527
## [376,] 528
## [377,] 529
## [378,] 531
## [379,] 533
## [380,] 534
## [381,] 535
## [382,] 536
## [383,] 537
## [384,] 538
## [385,] 539
## [386,] 540
## [387,] 541
## [388,] 543
## [389,] 544
## [390,] 545
## [391,] 548
## [392,] 551
## [393,] 553
## [394,] 554
## [395,] 557
## [396,] 558
## [397,] 559
## [398,] 560
## [399,] 561
## [400,] 564
## [401,] 567
## [402,] 568
## [403,] 569
## [404,] 570
## [405,] 575
## [406,] 576
## [407,] 578
## [408,] 580
## [409,] 581
## [410,] 582
## [411,] 584
## [412,] 585
## [413,] 586
## [414,] 588
## [415,] 589
## [416,] 590
## [417,] 591
## [418,] 593
## [419,] 595
## [420,] 597
## [421,] 598
## [422,] 599
## [423,] 601
## [424,] 602
## [425,] 603
## [426,] 604
## [427,] 607
## [428,] 608
## [429,] 609
## [430,] 610
## [431,] 611
## [432,] 613
## [433,] 614
## [434,] 615
## [435,] 616
## [436,] 617
## [437,] 618
## [438,] 619
## [439,] 621
## [440,] 622
## [441,] 623
## [442,] 624
## [443,] 625
## [444,] 626
## [445,] 627
## [446,] 629
## [447,] 630
## [448,] 632
## [449,] 633
## [450,] 634
## [451,] 636
## [452,] 638
## [453,] 639
## [454,] 642
## [455,] 643
## [456,] 645
## [457,] 647
## [458,] 648
## [459,] 649
## [460,] 650
## [461,] 651
## [462,] 652
## [463,] 653
## [464,] 654
## [465,] 655
## [466,] 656
## [467,] 658
## [468,] 659
## [469,] 660
## [470,] 661
## [471,] 662
## [472,] 663
## [473,] 665
## [474,] 666
## [475,] 667
## [476,] 668
## [477,] 670
## [478,] 671
## [479,] 675
## [480,] 676
## [481,] 678
## [482,] 679
## [483,] 680
## [484,] 681
## [485,] 683
## [486,] 687
## [487,] 688
## [488,] 689
## [489,] 690
## [490,] 691
## [491,] 693
## [492,] 695
## [493,] 698
## [494,] 699
## [495,] 700
## [496,] 703
## [497,] 704
## [498,] 705
## [499,] 706
## [500,] 707
## [501,] 708
## [502,] 712
## [503,] 714
## [504,] 716
## [505,] 719
## [506,] 720
## [507,] 721
## [508,] 722
## [509,] 724
## [510,] 726
## [511,] 727
## [512,] 730
## [513,] 733
## [514,] 734
## [515,] 735
## [516,] 737
## [517,] 738
## [518,] 740
## [519,] 741
## [520,] 742
## [521,] 744
## [522,] 745
## [523,] 747
## [524,] 748
## [525,] 749
## [526,] 750
## [527,] 751
## [528,] 752
## [529,] 753
## [530,] 754
## [531,] 755
## [532,] 758
## [533,] 759
## [534,] 761
## [535,] 762
## [536,] 763
## [537,] 764
## [538,] 766
## [539,] 768
## [540,] 769
## [541,] 770
## [542,] 773
## [543,] 774
## [544,] 775
## [545,] 776
## [546,] 777
## [547,] 778
## [548,] 779
## [549,] 781
## [550,] 782
## [551,] 785
## [552,] 786
## [553,] 787
## [554,] 788
## [555,] 789
## [556,] 790
## [557,] 791
## [558,] 792
## [559,] 793
## [560,] 795
## [561,] 797
## [562,] 798
## [563,] 800
## [564,] 802
## [565,] 803
## [566,] 804
## [567,] 805
## [568,] 806
## [569,] 809
## [570,] 810
## [571,] 811
## [572,] 812
## [573,] 813
## [574,] 816
## [575,] 817
## [576,] 819
training_data <- electron_count_predict_data[indexes,]
testing_data <- electron_count_predict_data[-indexes,]
training_data
testing_data
ctrl <- trainControl(
method = "repeatedcv",
number = 2,
repeats = 5)
fit <- train(local_res_atom_non_h_electron_sum ~ .,
data = training_data,
method = "lm",
trControl = ctrl)
fit
## Linear Regression
##
## 576 samples
## 5 predictor
##
## No pre-processing
## Resampling: Cross-Validated (2 fold, repeated 5 times)
## Summary of sample sizes: 288, 288, 289, 287, 288, 288, ...
## Resampling results:
##
## RMSE Rsquared MAE
## 16.70894 0.9717743 11.17189
##
## Tuning parameter 'intercept' was held constant at a value of TRUE
predicted_values <- predict(fit, newdata = testing_data)
predicted_values
## 4 5 9 12 13 15 16
## 98.21186 98.21186 55.98714 55.98714 55.98714 55.98714 55.98714
## 22 24 25 27 30 38 44
## 55.98714 53.05014 53.05014 53.05014 55.98714 50.96603 337.64707
## 46 55 57 60 61 62 63
## 150.83809 223.48021 38.20415 38.20415 38.20415 223.48021 18.30966
## 65 67 69 70 71 73 78
## 38.20415 38.13889 37.38832 53.09909 15.12790 55.98714 38.20415
## 82 83 84 87 89 92 93
## 42.96658 42.96658 42.96658 55.98714 18.79916 200.00072 42.96658
## 96 98 102 103 108 111 114
## 55.98714 50.96603 378.13846 105.07187 15.73978 105.07187 18.79916
## 119 123 126 133 139 142 144
## 50.96603 38.20415 38.20415 337.64707 290.55235 290.55235 290.55235
## 153 158 159 160 162 166 170
## 290.55235 290.55235 290.55235 290.55235 290.55235 290.55235 16.55561
## 176 182 186 189 196 199 202
## 290.55235 290.55235 290.55235 290.55235 290.55235 290.55235 88.56163
## 208 218 219 223 224 226 237
## 88.56163 88.56163 88.56163 88.56163 88.56163 88.56163 187.94058
## 238 241 242 245 248 250 251
## 187.94058 187.94058 50.96603 50.96603 50.96603 50.96603 18.79916
## 258 259 260 262 266 271 275
## 17.98332 17.98332 17.98332 14.61801 38.17355 50.96603 55.98714
## 276 279 280 282 286 290 291
## 53.53964 53.53964 53.53964 53.53964 53.53964 233.77025 233.77025
## 294 295 296 299 300 312 313
## 233.77025 233.77025 233.77025 233.77025 233.77025 15.12790 15.12790
## 315 317 319 320 327 330 337
## 15.12790 15.12790 15.12790 14.31207 15.12790 14.31207 50.96603
## 344 346 355 356 358 361 368
## 105.07187 105.07187 105.07187 105.07187 105.07187 105.07187 13.93678
## 374 376 390 393 394 395 408
## 82.01466 82.01466 16.75957 96.05836 96.05836 96.05836 15.12790
## 410 417 420 421 431 432 434
## 56.92121 290.55235 290.55235 290.55235 55.98714 55.98714 38.17355
## 438 442 447 449 454 456 458
## 230.28691 42.96658 314.16758 16.14770 378.13846 50.96603 15.73978
## 459 461 462 466 467 471 476
## 50.96603 50.96603 55.98714 38.20415 18.79916 38.20415 89.25166
## 478 481 495 496 502 503 504
## 186.90827 55.98714 18.79916 18.79916 18.79916 16.75957 16.75957
## 506 507 513 521 526 530 532
## 204.00123 204.00123 223.69770 38.20415 16.55561 290.55235 290.55235
## 542 546 547 549 550 552 555
## 290.55235 290.55235 290.55235 290.55235 290.55235 105.07187 105.07187
## 556 562 563 565 566 571 572
## 105.07187 105.07187 105.07187 105.07187 55.98714 55.98714 55.98714
## 573 574 577 579 583 587 592
## 55.98714 55.98714 55.98714 55.98714 55.98714 105.07187 55.98714
## 594 596 600 605 606 612 620
## 55.98714 55.98714 96.05836 47.47739 16.55561 16.55561 50.96603
## 628 631 635 637 640 641 644
## 17.77937 16.75957 50.96603 50.96603 50.96603 50.96603 16.75957
## 646 657 664 669 672 673 674
## 55.98714 94.35531 94.35531 305.11182 241.60232 241.60232 241.60232
## 677 682 684 685 686 692 694
## 241.60232 241.60232 16.55561 105.07187 105.07187 56.92121 105.07187
## 696 697 701 702 709 710 711
## 56.92121 16.14770 105.07187 56.92121 84.14251 60.32181 66.27698
## 713 715 717 718 723 725 728
## 66.27698 16.14770 60.32181 36.50110 16.14770 60.32181 72.23216
## 729 731 732 736 739 743 746
## 96.05286 15.98453 16.75957 14.33246 24.10209 24.10209 16.55561
## 756 757 760 765 767 771 772
## 48.73880 38.20415 327.12194 76.48979 76.48979 50.96603 14.56498
## 780 783 784 794 796 799 801
## 21.49141 17.57541 22.47042 187.45726 20.51241 176.52124 191.63842
## 807 808 814 815 818 820
## 18.79916 18.79916 50.96603 50.96603 38.20415 55.98714
RMSE(testing_data$local_res_atom_non_h_electron_sum, predicted_values)
## [1] 17.49885
R2(testing_data$local_res_atom_non_h_electron_sum, predicted_values)
## [1] 0.9691841